Developing Indicators of Habitat and Ecosystem Change in the Gulf of Maine

Study area

The focus area for this project is coastal Maine, extending from the coast to the eastern boundary of the NOAA statistical areas 511, 512, 513.

usStates <- rnaturalearth::ne_states("united states of america", returnclass = "sf")
ne_us <- usStates %>% filter(name == "Maine")
statarea <- sf::st_read(paste0(gmRi::shared.path(group = "Res_Data", folder = "Shapefiles/Statistical_Areas"), "Statistical_Areas_2010_withnames.shp"), quiet = TRUE) %>% 
  filter(Id %in% c(511, 512, 513)) %>% 
  mutate(Id = as.factor(Id))
statarea_sf <- sf::st_simplify(statarea, dTolerance = .05)
mcc_turnoff_sf <- sf::st_read(here::here("Data/Shapefiles/MCC_turnoff/MCC_turnoff.shp"), quiet = TRUE)

ggplot() +  
  geom_sf(data = statarea_sf, aes(fill = Id)) + 
  geom_sf(data= ne_us, fill = "grey") +
  geom_sf(data = mcc_turnoff_sf, fill = "transparent", color = "black") +
  scale_fill_discrete(name = "Stat area") +
  theme(panel.background = element_blank(), 
        panel.grid = element_blank(), 
        axis.title = element_blank(),
        axis.ticks = element_blank())
Study region. NOAA statistical areas are indicated as colored polygons. Maine coastal current study region indicated by the black outlined open polygon

Study region. NOAA statistical areas are indicated as colored polygons. Maine coastal current study region indicated by the black outlined open polygon

Indicators

The following indicators are used in this report

  • Surface and bottom temperature (FVCOM)
  • Surface and bottom salinity (FVCOM)
  • Maine Coastal Current Index (FVCOM)
  • Species-based lobster predator index (NOAA Trawl Survey)
# Indicators
FVCOM <- read_csv(here::here("Indicators/FVCOM_stat_area_temps.csv")) %>% 
  mutate(Year = Year -1)

sal <- read_csv(here::here("Indicators/FVCOM_stat_area_sal.csv")) %>% 
  mutate(Year = Year -1)

maineCC <- read_csv(here::here("Indicators/mcc_pca_pc1_2.csv")) %>% 
  rename("Year" = yr, "Month" = mon) %>% 
  mutate(Year = Year -1)

species_index <- read_rds(here::here("Indicators/nmfs_trawl_lobster_predator_indices.rdata")) %>% 
  rename("Year" = year) %>% 
  mutate(Year = Year -1)

mcc_turnoff_subset <- read_csv(here::here("Indicators/mcc_turnoff_subset.csv")) %>% 
  mutate(Year = Year -1)

cprData <- read_csv(here::here("Indicators/cpr_focal_pca_timeseries_period_1961-20017.csv")) %>% 
  mutate(`First Mode` = `First Mode`*-1) %>%
  rename("Year" = year,
         "FirstMode" = `First Mode`,
         "SecondMode" = `Second Mode`) %>% 
  select(Year, FirstMode, SecondMode) %>% 
  mutate(Year = Year -1)

Temperature

Source: FVCOM NECOFS Monthly Means Thredds Link Methods: * Surface and bottom layer (1 m above bathymetry) * Regridded to regular 0.1 deg grid * Averaged over NOAA statistical area

yrFVCOM <- FVCOM %>% 
  filter(Month %in% season) %>% 
  group_by(Year, stat_area) %>% 
  summarise(sur_temp = mean(sur_temp),
            bot_temp = mean(bot_temp), .groups="drop") %>% 
  mutate(stat_area = as.factor(stat_area))

tPlot1 <- yrFVCOM %>% 
  ggplot() +
  geom_line(aes(Year, sur_temp, col = stat_area)) + 
  theme_bw() +
  labs(y = "SST (deg C)")

tPlot2 <- yrFVCOM %>% 
  ggplot() +
  geom_line(aes(Year, bot_temp, col = stat_area)) + 
  theme_bw() +
  labs(y = "Bottom T (deg C)")

tPlot1/tPlot2

Salinity

Source: FVCOM NECOFS Monthly Means Thredds Link Methods: * Surface and bottom layer (1 m above bathymetry) * Regridded to regular 0.1 deg grid * Averaged over NOAA statistical area for each year

yrSal <- sal %>% 
  filter(Month %in% season) %>% 
  group_by(Year, stat_area) %>% 
  summarise(sur_sal = mean(sur_sal),
            bot_sal = mean(bot_sal)) %>% 
  mutate(stat_area = as.factor(stat_area))

sPlot1 <- yrSal %>% 
  ggplot() +
  geom_line(aes(Year, sur_sal, col = stat_area)) + 
  theme_bw() +
  labs(y = "SSS (deg C)")

sPlot2 <- yrSal %>% 
  ggplot() +
  geom_line(aes(Year, bot_sal, col = stat_area)) + 
  theme_bw() +
  labs(y = "Bottom S (deg C)")

sPlot1/sPlot2

Maine Coastal Current Index

Source: FVCOM NECOFS Monthly Means Thredds Link Methods: * Surface layer * Crop to Maine Coastal Current interest area * Regridded to regular 0.1 deg grid * Averaged over NOAA statistical area for each year * See MCC_index_report.Rmd for details

yrMCC <- maineCC %>% 
  filter(Month %in% season) %>% 
  group_by(Year) %>% 
  summarise(MCCPC1 = mean(PC1))

MCCPC_plot <- yrMCC %>% 
  mutate(c = ifelse(MCCPC1 > 0, 1, 2)) %>% 
  ggplot() + geom_col(aes(x = Year, y = MCCPC1, fill = as.factor(c))) + theme_bw() + theme(legend.position = "none") + labs(x = "Year", y = "Maine Coastal Current PC1")

MCCPC_maps <- mcc_turnoff_subset %>% 
  mutate(Year = lubridate::year(as.Date(Date)),
         Month = lubridate::month(as.Date(Date))) %>% 
  filter(Year %in% c(1982, 1992, 1995, 2001, 2012, 2014),
         Month %in% c(5,6,7,8)) %>% 
  group_by(Year, lat, lon) %>% 
  summarise(u = mean(u),
            v = mean(v), .groups = "drop") %>% 
  mutate(vel = sqrt(u^2+v^2), 
         PC = if_else(Year == 2010, "negative PC1", if_else(Year ==2013, "positive PC1", "neutral PC1"))) %>% 
  ggplot() + geom_sf(data= ne_us, fill = "grey") + 
  geom_segment(aes(x = lon, y = lat, xend=lon+u, yend=lat+v, color = vel), 
               arrow = arrow(angle = 10, length = unit(0.05, "inches"), type = "closed")) + 
  scale_color_viridis_c() + theme_bw() + 
  coord_sf(datum = "+proj=longlat +datum=NAD83 +no_defs +ellps=GRS80 +towgs84=0,0,0") + 
  facet_wrap(~Year, nrow = 1)+ 
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        axis.title = element_blank(), axis.text = element_blank(),
        axis.ticks = element_blank())

MCCPC_plot / MCCPC_maps

Species Based Predator Index

Source: NOAA NE Fisheries Trawl Survey Methods: * Filtered to 15 lobster predators (ASMFC 2020) * Cropped to NOAA stat areas 511, 512, 513 * Stratum abundance: abundance per trawl summed over stat area * Stratified abundance: abundance / km2 multiplied by the area of the strata

yrSpecies_index <- 
  species_index %>% 
  filter(season == "Spring",
         `stratum id` != 'Strata 511-513') %>% 
  dplyr::select(Year, `stratum id`, "predators" = `stratified abundance`)

yrSpecies_index %>% 
  ggplot() +
  geom_line(aes(Year, predators, color = `stratum id` )) + theme_bw()

CPR data

Source: Continuous Plankton Recording

cprData %>% 
  ggplot() +
  geom_line(aes(Year, FirstMode)) +
  geom_line(aes(Year, SecondMode), color = "red") + theme_bw()

allIndex <- left_join(yrFVCOM, yrSal, by = c("Year", "stat_area")) %>% 
  left_join(yrMCC, by = c("Year" = "Year")) %>% 
  left_join(yrSpecies_index, by = c("Year" = "Year",
                                    "stat_area" = "stratum id")) %>% 
  left_join(cprData, by = "Year") %>% 
  na.omit()

Indicators PCA

Data: SST, BT, SSS, BS, MCC, Predator Index Method: Principal components analysis Years: 1990-2016

PCA Summary

# PCA by stat area
indicator_pca <- allIndex %>% 
  nest(data = c(-stat_area, -Year),
       Year = Year) %>% 
  mutate(pca = map(data, ~prcomp(.x, scale. = TRUE, center = TRUE)),
         pca_index = map(pca, ~tibble("PC1" = .x$x[,1],
                                      "PC2" = .x$x[,2],
                                      "PC3" = .x$x[,3])))

index_pca <- indicator_pca %>% 
  unnest(c(pca_index, Year, data))

PCA511 <- data.frame(summary(indicator_pca$pca[[1]])$importance) %>% 
  rownames_to_column("Result") %>%  mutate(stat_area = "511")
PCA512 <- data.frame(summary(indicator_pca$pca[[2]])$importance) %>% 
  rownames_to_column("Result") %>%  mutate(stat_area = "512")
PCA513 <- data.frame(summary(indicator_pca$pca[[3]])$importance) %>% 
  rownames_to_column("Result") %>%  mutate(stat_area = "513")

PCA_table <- bind_rows(PCA511,PCA512,PCA513)

knitr::kable(PCA_table)
Result PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 stat_area
Standard deviation 1.901027 1.305180 0.9965156 0.8122849 0.7891246 0.5369032 0.2836144 0.1957803 511
Proportion of Variance 0.451740 0.212940 0.1241300 0.0824800 0.0778400 0.0360300 0.0100500 0.0047900 511
Cumulative Proportion 0.451740 0.664680 0.7888100 0.8712800 0.9491200 0.9851500 0.9952100 1.0000000 511
Standard deviation 1.835508 1.312145 1.0022133 0.8565829 0.8166847 0.6173233 0.2653454 0.2292375 512
Proportion of Variance 0.421140 0.215220 0.1255500 0.0917200 0.0833700 0.0476400 0.0088000 0.0065700 512
Cumulative Proportion 0.421140 0.636350 0.7619100 0.8536200 0.9369900 0.9846300 0.9934300 1.0000000 512
Standard deviation 1.612874 1.352696 1.1286219 0.9274575 0.8711510 0.6376812 0.4330075 0.2860938 513
Proportion of Variance 0.325170 0.228720 0.1592200 0.1075200 0.0948600 0.0508300 0.0234400 0.0102300 513
Cumulative Proportion 0.325170 0.553890 0.7131200 0.8206400 0.9155000 0.9663300 0.9897700 1.0000000 513

Scree Plots

scree1 <- fviz_eig(indicator_pca$pca[[1]], ylab = "% variance explained") + scale_y_continuous(limits = c(0,60))
scree2 <- fviz_eig(indicator_pca$pca[[2]], ylab = "% variance explained") + scale_y_continuous(limits = c(0,60))
scree3 <- fviz_eig(indicator_pca$pca[[3]], ylab = "% variance explained") + scale_y_continuous(limits = c(0,60))

scree1 / scree2 / scree3

PC time series

PC1plot <- index_pca %>% 
  ggplot() + 
  geom_line(aes(Year, PC1, color = stat_area))

PC2plot <- index_pca %>% 
  ggplot() + 
  geom_line(aes(Year, PC2, color = stat_area))

PC3plot <- index_pca %>% 
  ggplot() + 
  geom_line(aes(Year, PC3, color = stat_area))

PC1plot / PC2plot / PC3plot

Loadings plot

Figure 4. Loadings of the variables

Figure 4. Loadings of the variables

Global PCA

# Global PCA

globalPCA <- allIndex %>% 
  group_by(Year) %>% 
  pivot_wider(names_from = stat_area,
              values_from = c(sur_temp, bot_temp, sur_sal, bot_sal, MCCPC1, predators)) %>% 
  nest(data = c(-Year),
       Year = Year) %>% 
  mutate(pca = map(data, ~prcomp(.x, scale. = TRUE, center = TRUE)),
         pca_index = map(pca, ~tibble("PC1" = .x$x[,1],
                                      "PC2" = .x$x[,2],
                                      "PC3" = .x$x[,3])))

index_globalPCA <- globalPCA %>% 
  unnest(c(pca_index, Year, data))

globalPCA_table <- data.frame(summary(globalPCA$pca[[1]])$importance) %>% 
  rownames_to_column("Result")

knitr::kable(globalPCA_table)
Result PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10 PC11 PC12 PC13 PC14 PC15 PC16 PC17 PC18 PC19 PC20
Standard deviation 2.992895 1.937734 1.67806 1.181539 1.016098 0.7975698 0.6537792 0.5398169 0.4653806 0.3806505 0.3552168 0.2813712 0.2404182 0.2023516 0.1249607 0.0663937 0.0465431 0.0261327 0 0
Proportion of Variance 0.447870 0.187740 0.14079 0.069800 0.051620 0.0318100 0.0213700 0.0145700 0.0108300 0.0072400 0.0063100 0.0039600 0.0028900 0.0020500 0.0007800 0.0002200 0.0001100 0.0000300 0 0
Cumulative Proportion 0.447870 0.635610 0.77641 0.846210 0.897830 0.9296400 0.9510100 0.9655800 0.9764100 0.9836500 0.9899600 0.9939200 0.9968100 0.9988600 0.9996400 0.9998600 0.9999700 1.0000000 1 1
# GlobaPCA

fviz_pca_var(globalPCA$pca[[1]],
             col.var = "contrib",
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
             repel = TRUE) + labs(title = "global PCA") +
  theme(legend.position = "none")

Chronolgical cluster

allIndex_ca <- allIndex %>% 
  group_by(Year) %>% 
  pivot_wider(names_from = stat_area,
              values_from = c(sur_temp, bot_temp, sur_sal, bot_sal, MCCPC1, predators)) %>% 
  column_to_rownames("Year")

allIndex_ca <- scale(allIndex_ca)

# determine number of clusters
wss <- (nrow(allIndex_ca)-1)*sum(apply(allIndex_ca,2,var)) # get sum of squares

for (i in 2:12) wss[i] <- sum(kmeans(allIndex_ca,
   centers=i)$withinss)

# look for break in plot like a scree plot
kmeans_scree <- ggplot() + 
  geom_line(aes(x = c(1:12), y = wss)) +
  labs(x = "Number of Clusters",
  y ="Within groups sum of squares")

# K-Means Cluster Analysis
fit <- kmeans(allIndex_ca, 3) # 4 cluster solution

# get cluster means
#aggregate(allIndex_ca,by=list(fit$cluster),FUN=mean)

# append cluster assignment
lobData_cluster <- data.frame(allIndex_ca, fit$cluster)

# Cluster Plot against 1st 2 principal components

# vary parameters for most readable graph
indicators_cluster <- cluster::clusplot(allIndex_ca, fit$cluster, color=TRUE, shade=TRUE,
   labels=2, lines=0)

indicators_cluster
## $Distances
## NULL
## 
## $Shading
## [1] 19.36983 13.47819 13.15198

Breakpoint analysis

Breakpoint analysis of indicators
# breapoint function
bp_analysis <- function(x){
  mod <-  lm(values ~ Year, data = x)
  o <- tryCatch(segmented::segmented(mod, seg.Z = ~Year, psi = c(2005)),  # need to estimate bp
                error = function(cond){cond})
}


# Breakpoint by stat area
indicator_breakpoint <- allIndex %>%
  pivot_longer(cols = c(sur_temp, bot_temp, sur_sal, bot_sal, MCCPC1, predators), 
               names_to = "Indicator", values_to = "values") %>% 
  nest(data = c(-stat_area, -Indicator)) %>% 
  mutate(seg = purrr::map(data, ~bp_analysis(.x)),
         bp = purrr::map(seg, ~data.frame(.x$psi)))

# get fitted values and plot over data
bp_plots <- list()
for(i in 1:length(indicator_breakpoint$data)){
  
  name <- paste0(indicator_breakpoint$stat_area[[i]], "_", indicator_breakpoint$Indicator[[i]])
  
    # get the fitted data
  fitted_df <- fitted(indicator_breakpoint$seg[[i]])
  
  if(is.null(fitted_df)){
    
    "check error log"
    
  } else {
      
    seg_model <- data.frame(Year = indicator_breakpoint$data[[i]]$Year, values = fitted_df)
  
  # plot the fitted model
  
    bp_plots[[name]] <- indicator_breakpoint$data[[i]] %>% 
      ggplot() + geom_line(aes(Year, values)) +
      geom_line(data = seg_model, aes(x = Year, y = values), col = "dark red")
  }
}

set.seed(1)
# extract the breakpoints
break_years <- indicator_breakpoint %>% 
  unnest(bp) %>% 
  select(stat_area, Indicator, Est.) %>% 
  mutate(Indicator = factor(Indicator, levels = c("MCCPC1", "predators", "sur_sal", "sur_temp", "bot_sal", "bot_temp")))

# plot break points
ggplot(break_years) +
  geom_point(aes(Est., Indicator, color = stat_area, shape = stat_area), size = 3) +
  scale_color_grey(name = "Stat area") +
  scale_shape(name = "Stat area") +
  labs(x = "Estimated breakpoint")

Brakpoint analysis of PC1 and PC2
set.seed(8)
# Breakpoint by stat area
pc1_breakpoint <- index_pca %>%
  select(stat_area, Year, PC1, PC2)  %>%
  pivot_longer(cols = c(PC1, PC2), 
               names_to = "Indicator", values_to = "values") %>% 
  nest(data = c(-stat_area, -Indicator))%>% 
  mutate(seg = purrr::map(data, ~bp_analysis(.x)),
         bp = purrr::map(seg, ~data.frame(.x$psi))) %>% 
  unnest(bp) %>% 
  select(stat_area, Indicator, Est., St.Err)

knitr::kable(pc1_breakpoint)
stat_area Indicator Est. St.Err
511 PC1 2006.276 1.520431
511 PC2 1996.371 4.341091
512 PC1 2006.070 1.770614
512 PC2 1996.000 4.377386
513 PC1 2006.000 1.946857
513 PC2 1997.000 4.286314

Lobster Data

ALSI <- read_csv(here::here("Biological_data/SettlementIndex.csv"))
ME_landings <- read_csv(here::here("Biological_data/ME_lob_landings_1950_2019.csv"))
ASFMCindicies <- read_csv(here::here("Biological_data/GOMGBK_indices.csv")) %>% 
  rename("lob_index" = Index, "name" = Survey)
GOMindex <- ASFMCindicies %>% 
  filter(name %in% c("MeFQ2", "MeMQ2"))
NEFSCindex <- ASFMCindicies %>% 
  filter(name %in% c("NefscFQ2", "NefscMQ2"))
sublegal <- readxl::read_excel(here::here("Biological_data/RawData.xlsx")) %>% 
  rename("Year" = year, "Month" = month, "stat_area" = `stat area`)

NEFSC_meIndex <- read_csv(here::here("Biological_data/nmfs_trawl_lobster_indices.csv")) %>% 
  rename("Year" = year, "stat_area" = `stratum id`)

ALSI index

Source: American Lobster Settlement Index data portal Sites:

  • Jonesport, Length: 2002-2018, stat area: 511
  • Mt. Desert Island, Length: 2000-2018, stat area: 512
  • Outer Penobscot Bay, Length: 2000-2018, stat area: 512
  • Mid-coast, Length: 1989-2018, stat area: 513
  • Casco Bay, Length: 2000-2018, stat area: 513
  • York, Length: 2000-2018, stat area: 513

Methods:

  • Sites grouped by NOAA stat area
  • Time series cropped to shortest length
  • Averaged by stat area
statKey <- data.frame("Location" = c('Jonesport',
'Mt. Desert Island',
'Outer Pen Bay',
'Mid-coast',
'Casco Bay',
'York'), "stat_area" = c(511,512,512,513,513,513))

ALSI <- ALSI %>% 
  left_join(statKey, by = "Location") %>% 
  filter(!is.na(stat_area),
         Year >= 2000) %>% 
  group_by(stat_area, Year) %>% 
  summarise(lob_index = mean(Yoy_density), .groups = "drop") %>% 
  mutate(stat_area = as.factor(stat_area),
         name = "ALSI") %>% 
  na.omit()

ALSI_cors <- ALSI %>% 
  left_join(index_pca, by = c("Year", "stat_area")) %>% 
  group_by(stat_area, name) %>% 
  summarise(corPC1 = corrr::correlate(lob_index, PC1)[[2]],
            corPC2 = corrr::correlate(lob_index, PC2)[[2]],
            corPC3 = corrr::correlate(lob_index, PC3)[[2]])

ALSI_plot <- ALSI %>% 
  ggplot() +
  geom_line(aes(Year, lob_index, col = as.factor(stat_area))) +
  scale_color_discrete(name = "Stat area") +
  labs(y = "ALSI yoy density")

ALSI_plot

Sublegal index

Calculate stratified means
  • Calculate catch per unit effort for each site for each year
  • Multiply cpue by the depth strata area factor
  • Group by stat area, sum the outputs
strata_area_factor <- tibble("stat_area" = c(511,512,513),
                                 "1" = c(0.412162162, 0.409847936, 0.370152761),
                                 "2" = c(0.277027027, 0.28602462, 0.397179788),
                                 "3" = c(0.310810811, 0.304127444, 0.23266745)) %>% 
  pivot_longer(cols = c("1", "2", "3"), names_to = "depth stratum", values_to = "strata_scale") %>% 
  mutate(`depth stratum` = as.numeric(`depth stratum`))

sublegal_cpue <- sublegal %>% 
  group_by(`trip ID`, `trip date`, Year, Month, `site ID`, 
           `depth stratum`, stat_area,
           `soak nights`, depth, `depth unit`, 
           `latitude (dd)`, `longitude (dd)`, `effort ID`) %>% 
  mutate(lob_count = sum(`sample ID` != 0), 
            lob_effort = lob_count/`soak nights`) %>% 
  group_by(`trip ID`, `trip date`, Year, Month, 
           `site ID`, `depth stratum`, stat_area,
           `soak nights`, depth, `depth unit`, 
           `latitude (dd)`, `longitude (dd)`) %>% 
  summarise(cpue = sum(lob_effort)/sum(!is.na(unique(`effort ID`))), .groups = "drop") %>% 
  left_join(strata_area_factor, by = c("stat_area", "depth stratum")) %>% 
  mutate(stratified_cpue = cpue*strata_scale,
         stat_area = as.factor(stat_area)) %>% 
  group_by(stat_area, Year) %>% 
  summarise(lob_index = sum(stratified_cpue), .groups = "drop") %>% 
  mutate(name = "sublegal_cpue")

sub_leagal_cors <- sublegal_cpue %>% 
  left_join(index_pca, by = c("Year", "stat_area")) %>% 
  group_by(stat_area, name) %>% 
  summarise(corPC1 = corrr::correlate(lob_index, PC1)[[2]],
            corPC2 = corrr::correlate(lob_index, PC2)[[2]],
            corPC3 = corrr::correlate(lob_index, PC3)[[2]])
            
sublegal_plot <- sublegal_cpue %>% 
  ggplot() +
  geom_line(aes(Year, lob_index, col = as.factor(stat_area))) +
  scale_color_discrete(name = "Stat area") +
  labs(y = "Sublegal lobster cpue")

sublegal_plot

ME yearly landings

Yearly Maine lobster landings in pounds from 1950-2020.

ME_landings_cors <- ME_landings %>% 
  left_join(index_pca, by = c("Year")) %>% 
  group_by(stat_area) %>% 
  summarise(corPC1 = corrr::correlate(Pounds, PC1)[[2]],
            corPC2 = corrr::correlate(Pounds, PC2)[[2]],
            corPC3 = corrr::correlate(Pounds, PC3)[[2]]) 

ME_pounds <- ME_landings %>% 
  mutate(name = "ME_landings") %>% 
  select(Year, "lob_index" = Pounds, name) 

ME_pounds %>% 
  ggplot() +
  geom_line(aes(Year, lob_index)) +
  labs(y = "Maine lobster landings (pounds)")

ME trawl index

ASFMC lobster abundance index based on the Maine trawl survey. Time spans 2003-2018 and indices are split into seasons and sex.

GOM_cors <- GOMindex %>% 
  left_join(index_pca, by = "Year") %>% 
  group_by(stat_area, name) %>% 
  summarise(corPC1 = corrr::correlate(lob_index, PC1)[[2]],
            corPC2 = corrr::correlate(lob_index, PC2)[[2]],
            corPC3 = corrr::correlate(lob_index, PC3)[[2]], .groups = "drop")

GOMindex %>% 
  ggplot() +
  geom_line(aes(Year, lob_index)) +
  facet_wrap(~name) +
  labs(y = "ME Trawl lob abundance Index")

Nefsc index

ASFMC lobster abundance index based on the NE fisheries trawl survey. Time spans 1978-2018 and indices are split into seasons and sex.

NEFSC_cors <- NEFSCindex %>% 
  left_join(index_pca, by = "Year") %>% 
  group_by(stat_area, name) %>% 
  summarise(corPC1 = corrr::correlate(lob_index, PC1)[[2]],
            corPC2 = corrr::correlate(lob_index, PC2)[[2]],
            corPC3 = corrr::correlate(lob_index, PC3)[[2]], .groups = "drop")

NEFSCindex %>% 
  ggplot() +
  geom_line(aes(Year, lob_index)) +
  facet_wrap(~name) +
  labs(y = "Nefsc trawl lob abundance index")

Biological data analysis

xyPlot <- function(x){
  x %>% 
    left_join(index_pca) %>% 
    na.omit() %>% 
    ggplot() +
    geom_point(aes(PC1, lob_index, color = stat_area)) + 
    geom_smooth(aes(PC1, lob_index, color = stat_area), method = "lm", se = FALSE) +
    facet_wrap(~name)
}

aicModel <- function(x){
  x %>% 
    left_join(index_pca) %>% 
    na.omit() %>% 
    select(stat_area, lob_index, PC1, PC2, PC3, name) %>% 
    group_by(stat_area, name) %>% 
    nest() %>% 
    mutate(aic = purrr::map(data, ~MASS::stepAIC(object = lm(lob_index ~ PC1 + PC2, data = .x), direction = "both", trace = 0)),
           stp = purrr::map(aic, broom::glance),
           index = purrr::map(aic, ~as.character(.x$call$formula)),
           model = paste(index[[1]][[2]], index[[1]][[1]], index[[1]][[3]])) %>% 
    select(stat_area, stp, model, name) %>% 
    unnest(c(stp, model))
}


##################################################

# break out trawl survey by stat area
# see what groups together to see if there is a distinction in the life history patterns
# Look at coefficients to see if PC1 is always the biggest

ALSI index

Correlation table

ALSI_cors %>% 
  na.omit() %>% 
  knitr::kable()
stat_area name corPC1 corPC2 corPC3
511 ALSI -0.2865149 0.1741885 -0.0550406
512 ALSI -0.2485525 0.0108428 -0.2707889
513 ALSI -0.4180668 0.2322178 -0.5501655

Scatter plot

xyPlot(ALSI)

AIC lm model selection table

aicSel <- aicModel(ALSI)

knitr::kable(aicSel)
stat_area r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC deviance df.residual nobs model name
511 0.0000000 0.0000000 0.4093072 NA NA NA -6.840335 17.68067 18.95878 2.177921 13 14 lob_index ~ 1 ALSI
512 0.0000000 0.0000000 0.3432832 NA NA NA -5.079515 14.15903 15.70421 1.767650 15 16 lob_index ~ 1 ALSI
513 0.1747798 0.1158355 0.4456658 2.965169 0.1070849 1 -8.703791 23.40758 25.72535 2.780652 14 16 lob_index ~ PC1 ALSI

Subleagal index

Correlation table

sub_leagal_cors %>%  
  na.omit() %>% 
  knitr::kable()
stat_area name corPC1 corPC2 corPC3
511 sublegal_cpue 0.5997732 0.0818308 0.0970885
512 sublegal_cpue 0.8482231 0.0260158 -0.0875769
513 sublegal_cpue 0.6622880 -0.0034103 0.4042346

Scatter plot

xyPlot(sublegal_cpue)

AIC lm model selection table

aicSel <- aicModel(sublegal_cpue)

knitr::kable(aicSel)
stat_area r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC deviance df.residual nobs model name
511 0.3597278 0.2796938 588.3786 4.494687 0.0668182 1 -76.84737 159.6947 160.6025 2769515 8 10 lob_index ~ PC1 sublegal_cpue
512 0.7194824 0.6844177 1526.8310 20.518710 0.0019250 1 -86.38316 178.7663 179.6741 18649704 8 10 lob_index ~ PC1 sublegal_cpue
513 0.4386254 0.3684535 942.6210 6.250733 0.0369336 1 -81.56031 169.1206 170.0284 7108274 8 10 lob_index ~ PC1 sublegal_cpue

ME yearly landings

Correlation table

ME_landings_cors %>%  
  na.omit() %>% 
  knitr::kable()
stat_area corPC1 corPC2 corPC3
511 0.6191539 -0.2215466 0.1403908
512 0.5888759 0.2676040 -0.2320589
513 0.5228336 -0.4517793 0.3534654

Scatter plot

xyPlot(ME_pounds)

AIC lm model selection table

aicSel <- aicModel(ME_pounds)

knitr::kable(aicSel)
stat_area r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC deviance df.residual nobs model name
511 0.4324345 0.3990483 26584322 12.95249 6.58e-05 2 -683.4822 1374.964 1381.408 2.402869e+16 34 37 lob_index ~ PC1 + PC2 ME_landings
512 0.4183867 0.3841742 26911304 12.22904 9.97e-05 2 -683.9345 1375.869 1382.313 2.462342e+16 34 37 lob_index ~ PC1 + PC2 ME_landings
513 0.4774595 0.4467218 25508068 15.53336 1.61e-05 2 -681.9531 1371.906 1378.350 2.212249e+16 34 37 lob_index ~ PC1 + PC2 ME_landings

ME Trawl index

Correlation table

GOM_cors %>% 
  filter(name %in% c("MeFQ2", "MeFQ4", "MeMQ2", "MeMQ4")) %>% 
  na.omit() %>% 
  knitr::kable()
stat_area name corPC1 corPC2 corPC3
511 MeFQ2 0.8503890 -0.3114242 -0.1412119
511 MeMQ2 0.8694017 -0.3283094 -0.0944283
512 MeFQ2 0.8349252 0.0629934 -0.1333419
512 MeMQ2 0.8700507 0.0538142 -0.1017580
513 MeFQ2 0.8151903 -0.1060868 0.4868037
513 MeMQ2 0.8421315 -0.0901881 0.4704269

Scatter plot

xyPlot(GOMindex)

AIC lm model selection table

aicSel <- aicModel(GOMindex)

knitr::kable(aicSel)
stat_area r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC deviance df.residual nobs model name
511 0.7231615 0.6979944 14.25576 28.73436 0.0002301 1 -51.90344 109.8069 111.5017 2235.492 11 13 lob_index ~ PC1 MeFQ2
512 0.6971001 0.6695637 14.91168 25.31562 0.0003831 1 -52.48823 110.9765 112.6713 2445.941 11 13 lob_index ~ PC1 MeFQ2
513 0.6645352 0.6340384 15.69280 21.79032 0.0006847 1 -53.15198 112.3040 113.9988 2708.905 11 13 lob_index ~ PC1 MeFQ2
511 0.7558594 0.7336647 13.93372 34.05600 0.0001132 1 -51.60640 109.2128 110.9076 2135.633 11 13 lob_index ~ PC1 MeMQ2
512 0.7569882 0.7348962 13.90146 34.26529 0.0001103 1 -51.57627 109.1525 110.8474 2125.758 11 13 lob_index ~ PC1 MeMQ2
513 0.7091855 0.6827479 15.20740 26.82480 0.0003041 1 -52.74351 111.4870 113.1819 2543.914 11 13 lob_index ~ PC1 MeMQ2

NEFSC trawl index

Correlation table

NEFSC_cors %>% 
  filter(name %in% c("NefscFQ2", "NefscFQ4", "NefscMQ2", "NefscMQ4")) %>% 
  na.omit() %>% 
  knitr::kable()
stat_area name corPC1 corPC2 corPC3
511 NefscFQ2 0.6360510 -0.1742111 0.2511408
511 NefscMQ2 0.6037032 -0.2228239 0.3557187
512 NefscFQ2 0.6156753 0.1459083 -0.1618782
512 NefscMQ2 0.6359954 0.1370553 -0.0749031
513 NefscFQ2 0.5471681 -0.3412702 0.3701525
513 NefscMQ2 0.5714269 -0.3316981 0.3132527

Scatter plot

xyPlot(NEFSCindex)

AIC lm model selection table

aicSel <- aicModel(NEFSCindex)

knitr::kable(aicSel)
stat_area r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC deviance df.residual nobs model name
511 0.4045609 0.3875483 1.620595 23.78015 0.0000233 1 -69.33603 144.6721 149.5048 91.92145 35 37 lob_index ~ PC1 NefscFQ2
512 0.3790561 0.3613148 1.654939 21.36580 0.0000499 1 -70.11195 146.2239 151.0567 95.85878 35 37 lob_index ~ PC1 NefscFQ2
513 0.4158582 0.3814970 1.628581 12.10252 0.0001074 2 -68.98166 145.9633 152.4070 90.17741 34 37 lob_index ~ PC1 + PC2 NefscFQ2
511 0.4141080 0.3796438 1.186011 12.01559 0.0001130 2 -57.24845 122.4969 128.9406 47.82516 34 37 lob_index ~ PC1 + PC2 NefscMQ2
512 0.4044902 0.3874756 1.178501 23.77317 0.0000233 1 -57.54968 121.0994 125.9321 48.61024 35 37 lob_index ~ PC1 NefscMQ2
513 0.4365523 0.4034083 1.163072 13.17139 0.0000581 2 -56.52583 121.0517 127.4953 45.99308 34 37 lob_index ~ PC1 + PC2 NefscMQ2

Cluster Analysis

GOMindex_ca <- GOMindex %>% 
  filter(Season == 2) %>% 
  select(Year, lob_index, name)

NEFSCindex_ca <- NEFSCindex %>% 
  filter(Season == 2) %>% 
  select(Year, lob_index, name)

lobData <- bind_rows(ALSI, sublegal_cpue, ME_pounds, GOMindex_ca, NEFSCindex_ca) %>% 
  pivot_wider(names_from = c(name, stat_area),
              values_from = lob_index) %>% 
  na.omit() %>% 
  column_to_rownames("Year") 

# standardize variables
lobData <- scale(lobData) %>% 
  data.frame()

lobData_t <- data.table::transpose(lobData)

# get row and colnames in order
colnames(lobData_t) <- rownames(lobData)
rownames(lobData_t) <- colnames(lobData)



# determine number of clusters
wss <- (nrow(lobData)-1)*sum(apply(lobData,2,var)) # get sum of squares

for (i in 2:12) wss[i] <- sum(kmeans(lobData,
   centers=i)$withinss)

# look for break in plot like a scree plot
plot(1:12, wss, type="b", xlab="Number of Clusters",
  ylab="Within groups sum of squares")

fpc::pamk(lobData)
## $pamobject
## Medoids:
##      ID   ALSI_511   ALSI_512   ALSI_513 sublegal_cpue_511 sublegal_cpue_512
## 2008  3  0.2773898  0.3727694  0.8306286        -0.8219887        -0.9264162
## 2017 12 -0.8002855 -0.6249370 -0.5819441         0.6768536         1.1938697
##      sublegal_cpue_513 ME_landings_NA   MeFQ2_NA  MeMQ2_NA NefscFQ2_NA
## 2008        -0.9486334     -1.4127242 -1.1595342 -1.166756   -1.141641
## 2017         0.8440615      0.3026508  0.9815946  0.931201    1.827133
##      NefscMQ2_NA
## 2008   -1.345064
## 2017    1.080726
## Clustering vector:
## 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 
##    1    1    1    1    1    1    2    2    2    2    2    2    2 
## Objective function:
##    build     swap 
## 2.069411 1.959058 
## 
## Available components:
##  [1] "medoids"    "id.med"     "clustering" "objective"  "isolation" 
##  [6] "clusinfo"   "silinfo"    "diss"       "call"       "data"      
## 
## $nc
## [1] 2
## 
## $crit
##  [1] 0.0000000 0.4911826 0.3453511 0.2884169 0.2354232 0.2033810 0.1929705
##  [8] 0.1457884 0.1594838 0.1417225
# K-Means Cluster Analysis
fit <- kmeans(lobData, 2) # 3 cluster solution
# get cluster means
aggregate(lobData,by=list(fit$cluster),FUN=mean)
##   Group.1   ALSI_511   ALSI_512   ALSI_513 sublegal_cpue_511 sublegal_cpue_512
## 1       1  0.7126048  0.7845213  0.8609674        -0.7666360        -0.8196149
## 2       2 -0.6108041 -0.6724468 -0.7379720         0.6571166         0.7025270
##   sublegal_cpue_513 ME_landings_NA   MeFQ2_NA   MeMQ2_NA NefscFQ2_NA
## 1        -0.8337142     -0.9249083 -0.8452135 -0.8586839  -0.9237534
## 2         0.7146122      0.7927785  0.7244687  0.7360148   0.7917886
##   NefscMQ2_NA
## 1  -0.8807332
## 2   0.7549141
# append cluster assignment
lobData_cluster <- data.frame(lobData, fit$cluster)

# Cluster Plot against 1st 2 principal components

# vary parameters for most readable graph
cluster::clusplot(lobData, fit$cluster, color=TRUE, shade=TRUE,
   labels=2, lines=0)

# Centroid Plot against 1st 2 discriminant functions
fpc::plotcluster(lobData, fit$cluster)